This script applies the mSigAct signature presence test (Ng et al. 2017), to evaluate the hypothesis that the mutational signature identified in OSCC 62074759 (the A^nT signature) is present in Candidate tumors. Candidate tumors were previously identified as tumors showing strong enrichment for mutations with the characteristics of A^nT as described in Supplemental Data 1 of this manuscript.
We start with the following input data:
candidates is the tumors identified using the script in Supplementary Data 1 (this is the ‘results’ object from this script)
PCAWG attributions contain 4 mutation files, one for each of the 4 major groups of tumors in the PCAWG7 analysis of mutational signatures (Alexandrov et al., 2018, downloaded from: https://www.synapse.org/#!Synapse:syn11804065 on June 24th 2018).
First, extract for each of the candidates, the PCAWG7 signature assignments which will be later used in the analysis
library(stringr)
candidates<-read.csv("candidatesW_AnT.txt",sep="\t",as.is=T)
## remove 62074759
candidates<-candidates[-nrow(candidates),]
candidates$sample<-str_split_fixed(rownames(candidates),"___",2)[,1]
## rename the tumor types in the candidates table because of different annotations
tumorTypes<-read.csv("tumorTypes.txt",sep="\t",as.is=T,row.names = 1)
for(i in rownames(tumorTypes)){
candidates$sample<-gsub(i,
tumorTypes$annotation[rownames(tumorTypes) == i],
candidates$sample)
}
## specify which file we should look in for the attributions
candidates$attributions[candidates$dataType == "WES_Other"]<-"nonPCAWG_WES"
candidates$attributions[candidates$dataType == "WES_TCGA"]<-"TCGA_WES"
candidates$attributions[candidates$dataType == "WGS_Other"]<-"nonPCAWG_WGS"
candidates$attributions[candidates$dataType == "WGS_ICGC"]<-"PCAWG_WGS"
## gather the signature attributions for the candidate samples
files<-list.files("PCAWG7 attributions",full.names = T)
attributions<-NULL
for(i in files){
df<-read.csv(i,as.is=T)
samples<-paste0(df$Cancer.Types,"::",df$Sample.Names)
df2<-df[samples %in% candidates$sample,]
attributions<-rbind(attributions,df2)
}
source("scr/mSigTools.v0.12.R")
## Loading required package: SnowballC
source("scr/mSigAct.v0.10.R")
## Warning: package 'nloptr' was built under R version 3.4.4
## Warning: package 'sets' was built under R version 3.4.4
##
## Attaching package: 'sets'
## The following object is masked from 'package:stringr':
##
## %>%
## load signatures
cosmic.sigs <-
get.signatures(
signature.file='PCAWG7and62074759_96.tsv',
exome.op=.h19.96.sureselect.v6.op)
cosmic.wes <- cosmic.sigs$exome
cosmic.wgs <- cosmic.sigs$genome
rm(cosmic.sigs)
# Downstream mSigAct requires that the elements of signatures sum to exactly 1.
# Eventually move this code to mSigAct or change to all.equals tolerance=....
# We need to run multiple sweep call, presumably because of rounding.
for (i in 1:3) cosmic.wes <- sweep(cosmic.wes, MARGIN=2, colSums(cosmic.wes), '/')
stopifnot(colSums(cosmic.wes) == 1)
for (i in 1:3) cosmic.wgs <- sweep(cosmic.wgs, MARGIN=2, colSums(cosmic.wgs), '/')
stopifnot(colSums(cosmic.wgs) == 1)
## load the catalogs of the candidate tumors
exomes<-read.csv("catalogs/spectrum_counts_exomes.txt",sep="\t",as.is=T)
genomes<-read.csv("catalogs/spectrum_counts_genomes.txt",sep="\t",as.is=T)
Define function to run mSigAct per sample, using the signatures attributed in the PCAWG analysis and AnT.
run.mSigAct.per.smp<-function(smp){
tmp<-attributions[attributions$Sample.Names == smp,4:ncol(attributions)]
sigs<-colnames(tmp)[!tmp[1,] == 0]
## signatures SBS1, SBS5 and SBS40 are present in all tumors
## therefore regardless of previous assignments,
## add SBS1 and SBS5 in this analysis
if(!"SBS1" %in% sigs){sigs<-c(sigs,"SBS1")}
if(!"SBS5" %in% sigs){sigs<-c(sigs,"SBS5")}
if(!"SBS40" %in% sigs){sigs<-c(sigs,"SBS40")}
## depending on data type, use WGS or WES signatures
dataType<-candidates$data[grep(smp,candidates$sample)]
dataType<-str_split_fixed(dataType,"_",2)[,1]
if(dataType == "WGS"){
universe<-cosmic.wgs
catalog<-genomes
} else {
universe<-cosmic.wes
catalog<-exomes
}
subverse<-universe[,c(sigs,"AnT")]
## ensure that catalog and signatures are in the same order
rownames(catalog)<-paste0(catalog$Before,catalog$Ref,
catalog$After,catalog$Var)
subverse<-subverse[rownames(catalog),]
input<-as.matrix(catalog[,grep(gsub("-",".",smp),colnames(catalog))])
colnames(input)<-smp
rownames(input)<-rownames(subverse)
analysis<-process.one.group(input,
subverse,
target.sig.name = "AnT",
path.root=paste0("mSigAct_output/",smp),
obj.fun = obj.fun.nbinom.maxlh,
nbinom.size=10, ## = dispersion parameter
mc.cores=1) ## = number of cores
# results
df<-t(rbind(pval=analysis$pval,analysis$exposure))
rownames(df)<-smp
return(df)
}
## make a df to save the attributions in
mSigAct_result<-attributions
mSigAct_result$AnT<-NA
mSigAct_result$mSigAct_pval<-0
for(smp in attributions$Sample.Names){
df<-as.data.frame(run.mSigAct.per.smp(smp))
mSigAct_result$mSigAct_pval[mSigAct_result$Sample.Names == smp]<-
df$pval
mSigAct_result[mSigAct_result$Sample.Names == smp,
colnames(mSigAct_result) %in% colnames(df)]<-
df[1,2:ncol(df)]
}
## remove signatures that are not attributed to any sample
mSigAct_result<-mSigAct_result[c(2,which(colSums(mSigAct_result[,-c(1:3)])>0)+3)]
## perform multiple testing correction
mSigAct_result$mSigAct_qval<-p.adjust(mSigAct_result$mSigAct_pval,method="BH")
## format to print output table to html
for(i in 2:(ncol(mSigAct_result)-2)){mSigAct_result[,i]<-as.numeric(format(as.numeric(mSigAct_result[,i]),digits=0,scientific=FALSE))}
mSigAct_result$mSigAct_pval<-format(mSigAct_result$mSigAct_pval,digits=3)
mSigAct_result$mSigAct_qval<-format(mSigAct_result$mSigAct_qval,digits=3)
library(knitr)
kable(mSigAct_result[,-(ncol(mSigAct_result)-1)],row.names=F,align = c("l",rep("c",14)))
| Sample.Names | SBS1 | SBS2 | SBS5 | SBS7a | SBS7d | SBS9 | SBS10a | SBS10b | SBS13 | SBS14 | SBS15 | SBS17a | SBS17b | SBS18 | SBS27 | SBS28 | SBS37 | SBS40 | SBS45 | SBS57 | AnT | mSigAct_qval |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BD121T | 117 | 0 | 145 | 0 | 0 | 0 | 1119 | 833 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 70 | 4.69e-03 |
| BD173T | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 344 | 879 | 3.42e-39 |
| BD182T | 349 | 0 | 879 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 877 | 2.68e-69 |
| BD223T | 0 | 0 | 160 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 80 | 0 | 0 | 182 | 2.73e-16 |
| sysucc-311T | 0 | 0 | 745 | 0 | 0 | 0 | 3913 | 7406 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1864 | 0 | 0 | 0 | 0 | 231 | 8.51e-04 |
| ESO-173 | 64 | 0 | 82 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 23 | 33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| HCC34T | 12 | 0 | 169 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 68 | 0 | 0 | 0 | 3.34e-01 |
| PCSI_0060_Pa_X | 33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 32 | 47 | 8.02e-07 |
| SKCM-JWCI-WGS-8-Tumor | 8 | 0 | 0 | 591 | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 93 | 0 | 0 | 0 | 9.48e-01 |
| T155 | 37 | 0 | 41 | 0 | 0 | 0 | 467 | 438 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3.34e-01 |
| LP6005935-DNA_B03 | 1798 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7443 | 17378 | 0 | 0 | 0 | 0 | 13150 | 0 | 0 | 5130 | 2.01e-07 |
| 8069334 | 1385 | 0 | 1169 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 558 | 1329 | 0 | 0 | 0 | 0 | 2177 | 0 | 0 | 484 | 6.47e-03 |
| 0047_CRUK_PC_0047_T1_DNA | 591 | 0 | 898 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1010 | 0 | 0 | 851 | 9.30e-15 |
| SP22031 | 2490 | 0 | 4530 | 0 | 0 | 0 | 124275 | 80170 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22766 | 0 | 0 | 0 | 0 | 0 | 4.54e-01 |
| SP16886 | 0 | 0 | 12505 | 0 | 0 | 0 | 434277 | 251355 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 151145 | 0 | 0 | 0 | 0 | 0 | 7.33e-01 |
| SP19295 | 4183 | 0 | 6759 | 0 | 0 | 0 | 127419 | 102190 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 17351 | 0 | 0 | 0 | 0 | 1965 | 8.21e-02 |
| SP17905 | 0 | 0 | 30073 | 0 | 0 | 0 | 1215032 | 478155 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 523625 | 185733 | 0 | 0 | 0 | 0 | 8.33e-01 |
| SP21400 | 9932 | 0 | 15411 | 0 | 0 | 0 | 256130 | 446142 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 65424 | 0 | 0 | 0 | 0 | 0 | 1.86e-01 |
| SP18946 | 0 | 0 | 16982 | 0 | 0 | 0 | 490500 | 366755 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 86576 | 0 | 0 | 0 | 0 | 0 | 4.54e-01 |
| SP80615 | 0 | 0 | 57839 | 0 | 0 | 0 | 1070122 | 795822 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 479939 | 0 | 0 | 0 | 0 | 22748 | 2.48e-02 |
| SP81494 | 2341 | 0 | 12182 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2613 | 0 | 0 | 0 | 2963 | 4521 | 0 | 2148 | 7.66e-04 |
| SP81711 | 3789 | 0 | 4065 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3537 | 0 | 0 | 0 | 4975 | 0 | 0 | 3908 | 1.26e-12 |
| SP80754 | 2263 | 0 | 3069 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2079 | 0 | 0 | 0 | 0 | 0 | 0 | 3797 | 1.50e-35 |
| SP111026 | 1768 | 0 | 4528 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9984 | 23885 | 0 | 0 | 0 | 0 | 3994 | 0 | 0 | 2630 | 1.21e-05 |
| SP111101 | 3450 | 0 | 3123 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1011 | 1607 | 0 | 0 | 0 | 0 | 5476 | 0 | 0 | 2245 | 4.93e-07 |
| SP92659 | 1802 | 0 | 6991 | 0 | 0 | 0 | 191629 | 55154 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26686 | 0 | 0 | 0 | 0 | 0 | 4.54e-01 |
| TCGA-AB-2824-03B-01W-0728-08 | 70 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AB-2851-03B-01W-0728-08 | 58 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AB-2867-03B-01W-0728-08 | 115 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AB-2868-03B-01W-0728-08 | 201 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1.86e-01 |
| TCGA-FU-A3HZ-01A-11D-A20U-09 | 0 | 0 | 107 | 0 | 0 | 0 | 1418 | 1003 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 655 | 0 | 0 | 0 | 0 | 71 | 1.71e-03 |
| TCGA-AY-4071-01A-01W-1073-09 | 45 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 79 | 0 | 0 | 40 | 8.28e-05 |
| TCGA-AG-3892-01A-01W-1073-09 | 0 | 0 | 91 | 0 | 0 | 0 | 1932 | 1570 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AG-3902-01A-01W-1073-09 | 47 | 0 | 0 | 0 | 0 | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 47 | 0 | 0 | 66 | 2.80e-07 |
| TCGA-2H-A9GM-01A-11D-A37C-09 | 23 | 0 | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 21 | 43 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.53e-01 |
| TCGA-IG-A4QS-01A-11D-A27G-09 | 89 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 40 | 64 | 0 | 0 | 0 | 0 | 126 | 0 | 0 | 0 | 3.83e-01 |
| TCGA-R6-A6L4-01A-11D-A31U-09 | 8 | 0 | 63 | 0 | 0 | 0 | 0 | 0 | 10 | 0 | 0 | 17 | 44 | 19 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7.33e-01 |
| TCGA-BA-A4IG-01A-11D-A25Y-08 | 15 | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 75 | 0 | 0 | 38 | 5.88e-05 |
| TCGA-BR-6453-01A-11D-1800-08 | 49 | 0 | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 32 | 65 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-D7-A4Z0-01A-22D-A25D-08 | 37 | 0 | 62 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 97 | 169 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7.51e-01 |
| TCGA-G2-AA3B-01A-11D-A391-08 | 0 | 352 | 101 | 0 | 0 | 0 | 0 | 0 | 438 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 45 | 4.46e-04 |
| TCGA-GC-A6I3-01A-11D-A31L-08 | 12 | 71 | 105 | 0 | 0 | 0 | 0 | 0 | 90 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 32 | 7.65e-03 |
| TCGA-A5-A0GP-01A-11W-A062-09 | 76 | 0 | 202 | 0 | 0 | 0 | 950 | 804 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 195 | 0 | 0 | 0 | 0 | 0 | 3.34e-01 |
| TCGA-AJ-A5DW-01A-11D-A27P-09 | 97 | 0 | 307 | 0 | 0 | 0 | 3352 | 1499 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 366 | 0 | 0 | 0 | 0 | 0 | 4.15e-01 |
| TCGA-AP-A1E0-01A-11D-A135-09 | 216 | 0 | 325 | 0 | 0 | 0 | 4978 | 2070 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 810 | 0 | 0 | 0 | 0 | 0 | 9.48e-01 |
| TCGA-AX-A1CE-01A-11D-A135-09 | 0 | 0 | 190 | 0 | 0 | 0 | 0 | 0 | 0 | 4221 | 15172 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.21e-01 |
| TCGA-BK-A6W3-01A-12D-A34Q-09 | 123 | 0 | 335 | 0 | 0 | 0 | 4032 | 1867 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 722 | 0 | 0 | 0 | 0 | 90 | 5.01e-02 |
| TCGA-DF-A2KV-01A-11D-A17W-09 | 38 | 0 | 74 | 0 | 0 | 0 | 1008 | 938 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 286 | 0 | 0 | 0 | 0 | 47 | 2.45e-02 |
| TCGA-E6-A1M0-01A-11D-A142-09 | 56 | 0 | 126 | 0 | 0 | 0 | 1913 | 778 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 296 | 0 | 0 | 0 | 0 | 0 | 4.55e-01 |
| TCGA-EO-A3AV-01A-12D-A19Y-09 | 0 | 0 | 398 | 0 | 0 | 0 | 4037 | 1899 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 990 | 0 | 0 | 0 | 0 | 0 | 2.06e-01 |
| TCGA-EO-A3AY-01A-12D-A19Y-09 | 0 | 0 | 333 | 0 | 0 | 0 | 3805 | 1489 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 686 | 0 | 0 | 0 | 0 | 0 | 1.21e-01 |
| TCGA-EY-A1GD-01A-11D-A13L-09 | 57 | 0 | 173 | 0 | 0 | 0 | 581 | 771 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 274 | 0 | 0 | 0 | 0 | 38 | 8.21e-02 |
| TCGA-EY-A1GI-01A-11D-A13L-09 | 101 | 0 | 421 | 0 | 0 | 0 | 4053 | 2481 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 818 | 0 | 0 | 0 | 0 | 0 | 2.19e-01 |
| TCGA-QF-A5YS-01A-11D-A31U-09 | 115 | 0 | 110 | 0 | 0 | 0 | 1152 | 1141 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 220 | 0 | 0 | 0 | 0 | 50 | 2.48e-02 |
| TCGA-QS-A5YQ-01A-11D-A31U-09 | 58 | 0 | 128 | 0 | 0 | 0 | 857 | 485 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 | 0 | 0 | 0 | 0 | 7.39e-01 |